import pandas as pd
import numpy as np
import matplotlib as mat
import seaborn as sb
import sklearn as sk
%matplotlib inline
df=pd.read_csv("ShoeStore_Feedback.csv")
df.head()
df.info()
df[df['Overall Experience Score'].isnull()].info()
df2=df.dropna(subset=['Overall Experience Score'],axis=0)
df2.info()
df2[df2['Storeid'].isnull()]
df2['Group Merchandizer'].value_counts()
df2['Group CityManager'].value_counts()
df2['Group Regional_Head'].value_counts()
df2['Group Business_Head'].value_counts()
df2['Group Management'].value_counts()
df2=df2.drop('Group Business_Head',axis=1)
df2=df2.drop('Group Management',axis=1)
df2=df2.drop('Group Regional_Head',axis=1)
df2=df2.drop('Group CityManager',axis=1)
df2=df2.drop('Group Merchandizer',axis=1)
df2['Language'].value_counts()
df2['Product Category'].value_counts()
df2['Product Category'].nunique()
df2['Store Ambience'].value_counts()
df2['Store Collection'].value_counts()
df2['Staff Product Knowledge'].value_counts()
df2['Staff Helpfulness'].value_counts()
df2['Store Ambience']=df2['Store Ambience'].map({'Very Good(4)':4,'Excellent(5)':5,'Good(3)':3,'Fair(2)':2,'Poor(1)':1 })
df2['Store Collection']=df2['Store Collection'].map({'Very Good(4)':4,'Excellent(5)':5,'Good(3)':3,'Fair(2)':2,'Poor(1)':1 })
df2['Staff Product Knowledge']=df2['Staff Product Knowledge'].map({'Very Good(4)':4,'Excellent(5)':5,'Good(3)':3,'Fair(2)':2,'Poor(1)':1 })
df2['Staff Helpfulness']=df2['Staff Helpfulness'].map({'Very Good(4)':4,'Excellent(5)':5,'Good(3)':3,'Fair(2)':2,'Poor(1)':1 })
df2['Log Value']=np.log(df2['Value'])
df2.hist(bins=100,figsize=(20,20))
df2[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(20,20))
mat.pyplot.scatter(df2['Overall Experience Score'],df2['Value'])
mat.pyplot.scatter(df2['Overall Experience Score'],df2['Log Value'])
sb.heatmap(df2[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].corr())
import nltk
from wordcloud import WordCloud
from nltk.corpus import stopwords
st=df2['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df2['Store Location(Touchpoint name)'].nunique()
df2['Store Location(Touchpoint name)'].value_counts()
df3=df2.loc[df2['Store Location(Touchpoint name)'] == 'CLB-COLABA']
df3.head()
df3['Log Value']=np.log(df3['Value'])
df3.hist(bins=100,figsize=(20,20))
df3[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
st=df3['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df4=df2.loc[df2['Store Location(Touchpoint name)'] == 'HYD-HYDERABAD']
df4.head()
df4['Log Value']=np.log(df4['Value'])
df4.hist(bins=100,figsize=(20,20))
df4[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
st=df4['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df5=df2.loc[df2['Store Location(Touchpoint name)'] == 'AAS-AHMEDABAD']
df5.head()
df5['Log Value']=np.log(df5['Value'])
df5.hist(bins=100,figsize=(20,20))
df5[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
st=df5['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df6=df2.loc[df2['Store Location(Touchpoint name)'] == 'CTR-COIMBATORE']
df6.head()
df6['Log Value']=np.log(df6['Value'])
df6.hist(bins=100,figsize=(20,20))
df6[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
st=df6['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df7=df2.loc[df2['Store Location(Touchpoint name)'] == 'AND-ANDHERI']
df7.head()
df7['Log Value']=np.log(df7['Value'])
df7.hist(bins=100,figsize=(20,20))
df7[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
st=df6['Comment'].to_string()
tokens=st.split()
stop = set(stopwords.words('english'))
resultwords = [word for word in tokens if word.lower() not in stop]
result = ' '.join(resultwords)
result=''.join([i for i in result if not i.isdigit()])
l=result.split()
try:
[i for i in l if str(i) != 'nan']
except:
{}
try:
for word in list(l):
l.remove(u'NaN')
except:
{}
s=' '.join(l)
whitelist = set('abcdefghijklmnopqrstuvwxy ABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer = ''.join(filter(whitelist.__contains__,s))
wordcloud = WordCloud(background_color="white",
width=1200, height=1000).generate(answer)
mat.pyplot.figure(figsize=(30,100))
mat.pyplot.imshow(wordcloud, interpolation="bilinear")
mat.pyplot.axis("off")
mat.pyplot.margins(x=0, y=0)
mat.pyplot.show()
df8=df2.loc[df2['Store Location(Touchpoint name)'] == 'BST-BANGALURU']
df8.head()
df8.hist(bins=100,figsize=(20,20))
df8[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
df8['Comment']
pd.set_option('display.max_colwidth', -1)
df9=df2.loc[df2['Store Location(Touchpoint name)'] == 'ALR-ALWAR']
df9.head()
df9=df2.loc[df2['Store Location(Touchpoint name)'] == 'ALR-ALWAR']
df9.head()
df9.hist(bins=100,figsize=(20,20))
df9[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
df9['Comment']
df10=df2.loc[df2['Store Location(Touchpoint name)'] == 'MAS- MUMBAI AIR PORT( M )']
df10.head()
df10.hist(bins=100,figsize=(20,20))
df10[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
df10['Comment']
df11=df2.loc[df2['Store Location(Touchpoint name)'] == 'JHR- JAMSHEDPUR']
df11.head()
df11.hist(bins=100,figsize=(20,20))
df11[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
df11['Comment']
df12=df2.loc[df2['Store Location(Touchpoint name)'] == 'BHR-BANGALURU']
df12.head()
df12.hist(bins=100,figsize=(20,20))
df12[['Store Ambience','Store Collection','Staff Product Knowledge','Staff Helpfulness']].plot.hist(stacked=True, bins=100,figsize=(15,20))
df12['Comment']